#! /usr/bin/python3
# -*- coding: utf-8 -*-
# __author__ = "Fengxuewei"
# Date: 2020/6/20


from bs4 import BeautifulSoup
import requests


url = 'https://www.tripadvisor.cn/Attractions-g60763-Activities-New_York_City_New_York.html#ATTRACTION_SORT_WRAPPER'
wb_data = requests.get(url)
soup = BeautifulSoup(wb_data.text,'lxml')
titles = soup.select('div.listing_title > a')
imgs = soup.select('img[width="180"]')
cates = soup.select('div.poi > div > div:nth-child(4)')

for title, img, cate in zip(titles, imgs, cates):
    data = {
        'title': title.get_text(),
        'img': img.get('src'),
        'cate': list(cate.stripped_strings),
    }
    print(data)

#ATTR_ENTRY_267031 > div > div > div > div.photo_booking.non_generic
#ATTR_ENTRY_267031 > div > div > div > div.photo_booking.non_generic > a